Read in carbonate chemistry data (via Ana P) and proportional time calculations (via Thomas D)

CC = read.csv('/Users/heidi.k.hirsh/Desktop/FLK_data/CCflk_plusBathy.csv') #make sure this is the most appropriate file
# CC = read.csv('/Users/heidi.k.hirsh/Documents/GitHub/test-flk/CCflkplusnuts.csv') #read in the version that has nutrients (both spatial and direct match) this only has 14 days so maybe I should use a version without ndays
dim(CC)/7/2 # 613.000000   8.714286
## [1] 115.0714   4.5000
ptime = st_read('/Users/heidi.k.hirsh/Desktop/FLK_data/BowtieFractions_Jan10.shp')
## Reading layer `BowtieFractions_Jan10' from data source 
##   `/Users/heidi.k.hirsh/Desktop/FLK_data/BowtieFractions_Jan10.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 38500 features and 5 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -81.83393 ymin: 24.39515 xmax: -80.04417 ymax: 25.6475
## Geodetic CRS:  WGS 84
names(ptime)
## [1] "simu"     "sampl_d"  "ndays"    "nrth_fr"  "sth_frc"  "geometry"
## Rename (why do names change with the original save?)
ptime=ptime %>% rename(sample_id=sampl_d,north_fraction=nrth_fr,south_fraction=sth_frc) #rename fraction columns
names(ptime)
## [1] "simu"           "sample_id"      "ndays"          "north_fraction"
## [5] "south_fraction" "geometry"
## Reorder regions and zones to make sense for plotting
# CC$Sub_region = factor(CC$Sub_region, levels = c("BB","UK","MK","LK"))
CC$Sub_region = factor(CC$Sub_region, levels = c("LK","MK","UK","BB"))
CC$Zone = factor(CC$Zone, levels = c("Inshore","Mid channel","Offshore","Oceanic"))
## Assign each month by number (so months don't plot misleadingly in alphabetical order and years are comparable)
head(CC$UTCDate_Time)
## [1] "2010-03-08 12:26:00" "2010-03-08 13:05:00" "2010-03-08 17:17:00"
## [4] "2010-03-08 17:42:00" "2010-03-08 17:45:00" "2010-03-08 18:22:00"
CC$date= as.Date(CC$Date,format="%Y-%m-%d")
CC$month=format(CC$date, format="%B")
CC$month2=format(CC$date, format="%m")
CC$month2=as.numeric(CC$month2)
head(CC$month2)
## [1] 3 3 3 3 3 3
# above code is required if building off of CC (without nutrients)
## Define visit IDs for matching fractions with carbonate chemistry metadata
CC$visitID_ch1 =  paste(CC$SiteID,CC$UTCDate_Time)
head(CC$visitID_ch1)
## [1] "1 2010-03-08 12:26:00"   "3 2010-03-08 13:05:00"  
## [3] "4 2010-03-08 17:17:00"   "5 2010-03-08 17:42:00"  
## [5] "5.5 2010-03-08 17:45:00" "6 2010-03-08 18:22:00"
CC$visitID_ch2 =  gsub(" ", "_" , CC$visitID_ch1, perl=TRUE)
head(CC$visitID_ch2)
## [1] "1_2010-03-08_12:26:00"   "3_2010-03-08_13:05:00"  
## [3] "4_2010-03-08_17:17:00"   "5_2010-03-08_17:42:00"  
## [5] "5.5_2010-03-08_17:45:00" "6_2010-03-08_18:22:00"
CC$visitID =  gsub("[: -]", "" , CC$visitID_ch2, perl=TRUE)
head(CC$visitID)
## [1] "1_20100308_122600"   "3_20100308_130500"   "4_20100308_171700"  
## [4] "5_20100308_174200"   "5.5_20100308_174500" "6_20100308_182200"
# CC already has this if it is the nutrients version
# CC$visitID=CC$visitID.x
# head(ptime)
# length(unique(CC$visitID)) #1610 #CC has more unique IDs. Why?
# length(unique(ptime$sample_id)) #1375
# dim(CC) #1611   69 
# dim(ptime)/14/2 #multiplied by ndays and forward/backward ==> 1375, same as unique sample_id names
ptime$visitID = ptime$sample_id
# head(ptime)
## Merge data with left_join
CC_frac=NULL
CC_frac = left_join(ptime, CC, by="visitID")
dim(CC_frac)
## [1] 38500    75
#save CC_frac
write.csv(CC_frac, file='/Users/heidi.k.hirsh/Documents/GitHub/test-flk/CCfractions.csv')

names(CC_frac)
##  [1] "simu"             "sample_id"        "ndays"            "north_fraction"  
##  [5] "south_fraction"   "visitID"          "X.1"              "X"               
##  [9] "CTDID"            "Region"           "Year"             "Mission"         
## [13] "Location"         "Latitude"         "Longitude"        "UTCDate"         
## [17] "UTCTime"          "Sample_Depth_m"   "DIC_umol_kg"      "TA_umol_kg"      
## [21] "pH_measured"      "pH_calculated"    "pCO2_uatm"        "Aragonite_Sat_W" 
## [25] "Salinity_Bottle"  "Conductivity_Sm"  "Salinity_CTD"     "Temperature_C"   
## [29] "Pressure_db"      "Density_Sigmat"   "SiteID"           "Survey_design"   
## [33] "Sample_frequency" "UTCDate_Time"     "datetime"         "ESTDate"         
## [37] "ESTTime"          "Month"            "Months"           "MY"              
## [41] "MoY"              "ToD"              "Season"           "Precipitation"   
## [45] "dec.lon"          "dec.lat"          "Zone"             "Sub_region"      
## [49] "Reference"        "Transect"         "Extreme"          "BestSalinity"    
## [53] "n35TA"            "n35DIC"           "TA_0"             "DIC_0"           
## [57] "nTA"              "nDIC"             "Date"             "TIMESTAMP_UTC"   
## [61] "TIMESTAMP_LST"    "jday.utc"         "jday.lst"         "hrod.lst"        
## [65] "PAR_MODIS_DAILY"  "PAR_MODIS_8DAY"   "PAR_MODIS_MON"    "pointDepth"      
## [69] "resolution"       "date"             "month"            "month2"          
## [73] "visitID_ch1"      "visitID_ch2"      "geometry"
dim(CC_frac)/14/2
## [1] 1375.000000    2.678571
## exclude the forward bowtie data
CCb_frac=subset(CC_frac,simu=='backward') #19250    72

ploThis=CCb_frac
## Bar graph to compare spatial (facet by regions and zones) and temporal (xaxis = Month, facet by year) patterns in north fraction
#problems: 
#does not separate out ndays and regions at the same time
#cannot distinguish no data versus 0/low fractions
#alphabetical months on xaxis = yikes

ggplot(data=ploThis, aes(fill=Zone, y=north_fraction, x=Month))+
  geom_bar(position="dodge",stat="identity")+
  facet_wrap(~ndays,ncol=2)+
  theme_bw()

ggplot(data=ploThis, aes(fill=Zone, y=north_fraction, x=Month))+
  geom_bar(position="dodge",stat="identity")+
  facet_wrap(~Sub_region,ncol=2)+
  theme_bw()

ggplot(data=ploThis, aes(fill=Zone, y=north_fraction, x=Month))+
  geom_bar(position="dodge",stat="identity")+
  facet_grid(Year ~ Sub_region)+
  theme_bw()

ggplot(data=ploThis, aes(x=UTCDate_Time,y=north_fraction,color=as.factor(Year)))+
  geom_point()+
  facet_grid(Zone ~ Sub_region)+
  theme_bw()

#yikes. no.

ggplot(data=ploThis, aes(x=month2,y=north_fraction,color=as.factor(ndays)))+
  geom_point()+
  facet_grid(Zone ~ Sub_region)+
  theme_bw()

#problems: 
#patterns are impossible
#decimal months
#ndays color scale is bad (lows and high number of days look the same)
Zones=c('Inshore', 'Mid channel', 'Offshore', 'Oceanic')
Regions=c('BB', 'UK', 'MK', 'LK')
Years=unique(CCb_frac$Year)
Ndays=unique(CCb_frac$ndays)

#loop through each year, plot, and save
#plot a line for each month showing change in fraction with ndays (xaxis)

y_i=1
# for (y_i in 1:length(Years)) {
  plot_year= Years[y_i]
  ploThis = subset(CCb_frac, Year==plot_year)

yearFrac = ggplot(data=ploThis, aes(y=north_fraction, x=ndays))+
  # geom_point(aes(color=month2))+
  geom_line(aes(color=month2,group=visitID))+ 
  facet_grid(Zone ~ Sub_region)+
  ggtitle(plot_year)+
  theme_bw()
yearFrac

# ggsave(filename=paste0("/Users/heidi.k.hirsh/Desktop/FractionsExplore/",plot_year,"_northFraction_month.png"),plot=yearFrac,width = 10, height = 8, dpi = 300)
# }

#problems: 
#number of months is not consistent so impossible to compare between plots
#not intuitive for ndays to be xaxis
#loop through regions to plot north fraction by month (for selected nday value)
#I don't know why I thought this would be helpful
#also decimal months again
#impossible to distinguish blues for years

ploThis=subset(CCb_frac,ndays==3)
r_i=3
# for (r_i in 1:length(Regions)) {
  plot_region= Regions[r_i]
  ploThis = subset(ploThis, Sub_region==plot_region)
  
  regFrac = ggplot(data=ploThis, aes(y=north_fraction, x=month2))+
    geom_point(aes(color=Year))+
    geom_line(aes(color=Year,group=interaction(SiteID,Year)))+ 
    facet_grid(Zone ~ Sub_region)+
    ggtitle(plot_region)+
    theme_bw()
  regFrac 

  # ggsave(filename=paste0("/Users/heidi.k.hirsh/Desktop/FractionsExplore/",plot_year,"_northFraction_month.png"),plot=yearFrac,width = 10, height = 8, dpi = 300)
# }
ploThis=subset(CCb_frac,ndays==3)

ggplot(data=ploThis, aes(y=north_fraction, x=month2))+
  geom_point(aes(color=Year))+
  geom_line(aes(color=Year,group=interaction(SiteID,Year)))+ 
  facet_grid(Zone ~ Sub_region)+
  theme_bw()

#this is just ndays=3. 
##Loop through and plot annual (monthly mean) lines for each ndays value

#loop through and plot annual lines for each number of days 
pal <- wes_palette("Zissou1", n=9,type = "continuous") #cannot make a discrete palette with more than 5? colors
pal

unique(CCb_frac$Year )
## [1] 2012 2014 2015 2016 2017 2018 2019 2020 2021
#loop through ndays to compare annual trends in space and time
i=1
# for (i in 1:length(Ndays)) {
  plot_n= Ndays[i]
  ploThis = subset(CCb_frac, ndays==plot_n)

  nFrac = ggplot(data=ploThis, aes(y=north_fraction, x=month2))+
    geom_point(aes(color=Year))+
    geom_line(aes(color=Year,group=interaction(SiteID,Year)))+ 
    # scale_colour_continuous(colours = pal,breaks=seq(2012,2021,1)) +   #trying to make a continuous color palette discrete (didn't work)
    scale_colour_gradientn(colours = pal) +
    facet_grid(Zone ~ Sub_region)+
    ggtitle(paste('ndays =',plot_n))+
    scale_x_continuous(breaks=c(1:12,1))+
    theme_bw()
  nFrac 

  # ggsave(filename=paste0("/Users/heidi.k.hirsh/Desktop/FractionsExplore/",plot_n,"_northFraction_ndays_zis.png"),plot=nFrac,width = 10, height = 8, dpi = 300)
# }
## Try violin plot (thank you Michael)
i=1
# for (i in 1:length(Ndays)) {
  plot_n= Ndays[i]
  ploThis = subset(CCb_frac, ndays==plot_n)
  
  nFrac = ggplot(data=ploThis, aes(x=month2,y=north_fraction))+
    geom_violin(aes(group=month2))+
    stat_summary(fun.y=mean, geom="line", color='red', size=1)+
    stat_summary(fun.y=mean, geom="point", color='black', size=2)+
    # geom_boxplot(aes(group=month2),width=0.1)+ #puts boxplot inside the violin (too busy)
    facet_grid(Zone ~ Sub_region)+
    ggtitle(paste('ndays =',plot_n))+
    scale_x_continuous(breaks=c(1:12,1))+
    theme_bw()
## Warning: The `fun.y` argument of `stat_summary()` is deprecated as of ggplot2 3.3.0.
## ℹ Please use the `fun` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
  nFrac 
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.

  # ggsave(filename=paste0("/Users/heidi.k.hirsh/Desktop/FractionsExplore/yearViolins2/",plot_n,"_northFraction_ndaysViolin.png"),plot=nFrac,width = 10, height = 8, dpi = 300)
# }

#Figure out how to label number of samples: https://stackoverflow.com/questions/65254425/is-there-a-way-for-adding-labels-with-number-of-observations-per-value-in-violin

#Plot seasonal nutrients

## COMBINE ALL DATA IN ONE DATAFRAME!
CCflk =  read.csv('/Users/heidi.k.hirsh/Desktop/.csv') #new file with (spatial) nutrients added 
unique(CCflk$ndays) #1-7
## [1] 7 6 5 4 3 2 1
CCflk$Sub_region = factor(CCflk$Sub_region, levels = c("LK","MK","UK","BB"))
CCflk$Zone = factor(CCflk$Zone, levels = c("Inshore","Mid channel","Offshore","Oceanic"))

CCflk$date= as.Date(CCflk$Date,format="%Y-%m-%d")
CCflk$month=format(CCflk$date, format="%B")
CCflk$month2=format(CCflk$date, format="%m")
CCflk$month2=as.numeric(CCflk$month2)
head(CCflk$month2)
## [1] 2 2 2 2 2 2
i=1
yval= 'NO3'

for (i in 1:length(unique(CCflk$ndays))) {
  plot_n= i
  ploThis = subset(CCb_frac, ndays==plot_n)

  nNuts = ggplot(data=CCflk, aes(y=NO3, x=month2))+
    geom_point(aes(color=Year))+
    geom_line(aes(color=Year,group=interaction(SiteID,Year)))+   #how are there multiple points for each siteID, year, and month
    scale_colour_gradientn(colours = pal) +
    facet_grid(Zone ~ Sub_region)+
    ggtitle(paste('ndays =',plot_n))+
    scale_x_continuous(breaks=c(1:12,1))+
    theme_bw()
  nNuts
  
  # ggsave(filename=paste0("/Users/heidi.k.hirsh/Desktop/",plot_n,"_",yval,"_Nutrients_ndaysLine.png"),plot=nNuts,width = 10, height = 8, dpi = 300)
  }
i=1
yval= 'NO3'

for (i in 1:length(unique(CCflk$ndays))) {
  plot_n= i

  nNuts = ggplot(data=CCflk, aes(y=NH4, x=month2))+
    geom_boxplot(aes(group=month2))+
    # geom_violin(aes(group=month2))+
    stat_summary(fun.y=mean, geom="line", color='red', size=1)+
    stat_summary(fun.y=mean, geom="point", color='black', size=2)+
    facet_grid(Zone ~ Sub_region)+
    ggtitle(paste('ndays =',plot_n))+
    scale_x_continuous(breaks=c(1:12,1))+
    theme_bw()
  nNuts
  
  # ggsave(filename=paste0("/Users/heidi.k.hirsh/Desktop/SeasonalNutrients/",plot_n,"_",yval,"_Nutrients_ndaysBox.png"),plot=nNuts,width = 10, height = 8, dpi = 300)
  }
#chla (or other nutrient) regional, zone, year, month variability

chla_var = ggplot(data=CCflk)+
  geom_smooth(aes(x=month2, y=Chla, colour=Sub_region),
              span=0.8, alpha=0.2, se=F)+
  stat_summary(aes(x=month2, y=Chla, colour=Sub_region),
               fun.data='mean_cl_boot', geom='errorbar',
               width=0.2, position= position_dodge(width=0.3))+
  stat_summary(aes(x=month2, y=Chla, colour=Sub_region),
               fun.data='mean_cl_boot', geom='point',
               width=0.2, position= position_dodge(width=0.3))+
  facet_wrap(CCflk$Zone)+
  scale_x_continuous(breaks=c(1:12,1))+
  theme_bw()
## Warning in stat_summary(aes(x = month2, y = Chla, colour = Sub_region), :
## Ignoring unknown parameters: `width`
chla_var
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 2528 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2528 rows containing non-finite values (`stat_summary()`).
## Removed 2528 rows containing non-finite values (`stat_summary()`).